import requests
from bs4 import BeautifulSoup
def get_daum_news_title(news_id):
url = 'https://news.v.daum.net/v/{}'.format(news_id)
resp = requests.get(url)
soup = BeautifulSoup(resp.text)
title_tag = soup.select_one('h3.tit_view')
if title_tag:
return title_tag.get_text()
return ""
get_daum_news_title('20190728165812603')
get_daum_news_title('20190801114158041')
def get_daum_news_content(news_id):
url = 'https://news.v.daum.net/v/{}'.format(news_id)
resp = requests.get(url)
soup = BeautifulSoup(resp.text)
content = ''
for p in soup.select('div#harmonyContainer p'):
content += p.get_text()
return content
get_daum_news_content('20190728165812603')
get_daum_news_content('20190801114158041')
url = 'https://comment.daum.net/apis/v1/posts/@20190728165812603/comments?parentId=0&offset=43&limit=10&sort=RECOMMEND&isInitial=false'
headers = {
'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJncmFudF90eXBlIjoiYWxleF9jcmVkZW50aWFscyIsInNjb3BlIjpbXSwiZXhwIjoxNTY0Njc4NjQ1LCJhdXRob3JpdGllcyI6WyJST0xFX0NMSUVOVCJdLCJqdGkiOiJlZGUxNzM0MS1hNWNjLTRmYmQtODJkMy0zZTMwOGMwMGViZTEiLCJjbGllbnRfaWQiOiIyNkJYQXZLbnk1V0Y1WjA5bHI1azc3WTgifQ.Cxs2g1hUUAjyuSrUDAhaKGol8vvyW-_mwPtV0X0DvEU',
'Origin': 'https://news.v.daum.net',
'Referer': 'https://news.v.daum.net/v/20190728165812603',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
resp = requests.get(url, headers=headers)
def get_daum_news_comments(news_id):
headers = {
'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJncmFudF90eXBlIjoiYWxleF9jcmVkZW50aWFscyIsInNjb3BlIjpbXSwiZXhwIjoxNTY0Njc4NjQ1LCJhdXRob3JpdGllcyI6WyJST0xFX0NMSUVOVCJdLCJqdGkiOiJlZGUxNzM0MS1hNWNjLTRmYmQtODJkMy0zZTMwOGMwMGViZTEiLCJjbGllbnRfaWQiOiIyNkJYQXZLbnk1V0Y1WjA5bHI1azc3WTgifQ.Cxs2g1hUUAjyuSrUDAhaKGol8vvyW-_mwPtV0X0DvEU',
'Origin': 'https://news.v.daum.net',
'Referer': 'https://news.v.daum.net/v/20190728165812603',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url_template = 'https://comment.daum.net/apis/v1/posts/@{}/comments?parentId=0&offset={}&limit=10&sort=RECOMMEND&isInitial=false'
offset = 0
comments = []
while True:
url = url_template.format(news_id, offset)
resp = requests.get(url, headers=headers)
data = resp.json()
if not data:
break
comments.extend(data)
offset += 10
return comments
len(get_daum_news_comments('20190728165812603'))
len(get_daum_news_comments('20190801114158041'))